Calls Predictions

In [92]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_error
from math import sqrt
import warnings
from tabulate import tabulate
warnings.simplefilter('ignore')

Loading the Dataset

In [6]:
df = pd.read_csv('ME.csv')
df.head(10)
Out[6]:
Fecha skill starttime ATEND ABANDO ANTED_20S
0 2023-01-01 1205 00:00:00 0 0 0
1 2023-01-01 1201 00:00:00 1 0 1
2 2023-01-01 1202 00:00:00 0 0 0
3 2023-01-01 1203 00:00:00 0 0 0
4 2023-01-01 1202 00:30:00 0 0 0
5 2023-01-01 1201 00:30:00 0 0 0
6 2023-01-01 1205 00:30:00 0 0 0
7 2023-01-01 1203 00:30:00 0 0 0
8 2023-01-01 1205 01:00:00 0 0 0
9 2023-01-01 1201 01:00:00 0 0 0

Check Dataset

In [7]:
df.isnull().sum()
Out[7]:
Fecha        0
skill        0
starttime    0
ATEND        0
ABANDO       0
ANTED_20S    0
dtype: int64
In [8]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41128 entries, 0 to 41127
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Fecha      41128 non-null  object
 1   skill      41128 non-null  int64 
 2   starttime  41128 non-null  object
 3   ATEND      41128 non-null  int64 
 4   ABANDO     41128 non-null  int64 
 5   ANTED_20S  41128 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 1.9+ MB
In [9]:
df.describe()
Out[9]:
skill ATEND ABANDO ANTED_20S
count 41128.000000 41128.000000 41128.000000 41128.000000
mean 1202.750000 5.249368 0.055874 4.737819
std 1.479038 8.682921 0.406371 7.825416
min 1201.000000 0.000000 0.000000 0.000000
25% 1201.750000 0.000000 0.000000 0.000000
50% 1202.500000 1.000000 0.000000 1.000000
75% 1203.500000 7.000000 0.000000 6.000000
max 1205.000000 92.000000 19.000000 79.000000
In [19]:
df.skew()
<ipython-input-19-9e0b1e29546f>:1: FutureWarning: The default value of numeric_only in DataFrame.skew is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.skew()
Out[19]:
skill         0.434667
ATEND         2.551994
ABANDO       16.079719
ANTED_20S     2.541835
dtype: float64

Groupby Dataset

In [149]:
df_reemplazado = df.replace({1205: 'A', 1201: 'B',1202: 'C',1203: 'D'})
df_g = df.groupby(['Fecha'])['ATEND', 'ABANDO', 'ANTED_20S'].sum()

Exploratory Data Analysis

In [125]:
fig, ax = plt.subplots(figsize=(20, 6))

sns.lineplot(x='Fecha', y='ATEND', data = df,ax=ax, label='Llamadas Atendidas')
sns.lineplot(x='Fecha', y='ABANDO', data = df,ax=ax, label='Llamadas Abandonadas')
sns.lineplot(x='Fecha', y='ANTED_20S', data = df,ax=ax, label = 'Llamadas Atendidas 20s')
plt.xlabel('Fecha')
plt.ylabel('Valor')
plt.xticks(rotation=90, ha='right', fontsize=8)

plt.show()
In [35]:
datos_agrupados_Atend = df_reemplazado.groupby(['Fecha', 'skill'])['ATEND'].sum().reset_index()
datos_agrupados_Abandon = df_reemplazado.groupby(['Fecha', 'skill'])['ABANDO'].sum().reset_index()
datos_agrupados_Atend20s = df_reemplazado.groupby(['Fecha', 'skill'])['ANTED_20S'].sum().reset_index()
In [126]:
fig, ax = plt.subplots(2, 3, figsize=(30, 10))

sns.lineplot(x='Fecha', y='ATEND', data=df, ax=ax[0,0])
sns.lineplot(x='Fecha', y='ABANDO', data=df, ax=ax[0,1])
sns.lineplot(x='Fecha', y='ANTED_20S', data=df, ax=ax[0,2])
sns.lineplot(x='Fecha', y='ATEND',hue='skill', data=datos_agrupados_Atend, ax=ax[1,0])
sns.lineplot(x='Fecha', y='ABANDO',hue='skill', data=datos_agrupados_Abandon, ax=ax[1,1])
sns.lineplot(x='Fecha', y='ANTED_20S',hue='skill', data=datos_agrupados_Atend20s, ax=ax[1,2])
Out[126]:
<Axes: xlabel='Fecha', ylabel='ANTED_20S'>
In [127]:
plt.figure(figsize=(10, 10))

sns.distplot(df['ATEND'], hist_kws={'color': 'blue', 'label': 'ATEND'})
sns.distplot(df['ABANDO'], hist_kws={'color': 'green', 'label': 'ABANDO'})
sns.distplot(df['ANTED_20S'], hist_kws={'color': 'red', 'label': 'ANTED_20S'})

plt.title("Histograma General")
plt.xlabel("Valores")
plt.ylabel("Frecuencia")
plt.show()
In [128]:
fig, ax = plt.subplots(1, 3, figsize=(20, 10))

sns.distplot(df[['ATEND']],hist_kws={'color': 'blue', 'label': 'ATEND'} ,ax=ax[0])
sns.distplot(df[['ABANDO']], hist_kws={'color': 'green', 'label': 'ABANDO'},ax=ax[1])
sns.distplot(df[['ANTED_20S']], hist_kws={'color': 'red', 'label': 'ANTED_20S'},ax=ax[2])
Out[128]:
<Axes: ylabel='Density'>

Improving the Grouping

In [129]:
fig, ax = plt.subplots(2, 3, figsize=(30, 10))

sns.distplot(df.groupby(['Fecha'])['ATEND'].sum(),hist_kws={'color': 'blue', 'label': 'ATEND'},ax=ax[0,0])
sns.distplot(df.groupby(['Fecha'])['ABANDO'].sum(),hist_kws={'color': 'green', 'label': 'ABANDO'},ax=ax[0,1])
sns.distplot(df.groupby(['Fecha'])['ANTED_20S'].sum(),hist_kws={'color': 'red', 'label': 'ANTED_20S'},ax=ax[0,2])
sns.histplot(data=datos_agrupados_Atend, x='ATEND', hue='skill', element='step', stat='density', kde = True, common_norm=False,ax=ax[1,0])
sns.histplot(data=datos_agrupados_Abandon, x='ABANDO', hue='skill', element='step', stat='density', kde = True,common_norm=False,ax=ax[1,1])
sns.histplot(data=datos_agrupados_Atend20s, x='ANTED_20S', hue='skill', element='step', stat='density',kde = True ,common_norm=False,ax=ax[1,2])

plt.show()
In [198]:
datos_agregados = df.groupby(['Fecha', 'skill'])['ATEND', 'ABANDO'].sum().reset_index()
df_reemplazado = datos_agregados.replace({1205: 'A', 1201: 'B',1202: 'C',1203: 'D'})

plt.figure(figsize=(20, 6))
sns.jointplot(data=df_reemplazado, x='ABANDO', y='ATEND', kind = 'reg')
sns.jointplot(data=df_reemplazado, x='ABANDO', y='ATEND', hue = 'skill')
sns.jointplot(data=df_reemplazado, x='ABANDO', y='ATEND', hue = 'skill', kind='kde')
g = sns.lmplot(x='ABANDO', y='ATEND', hue="skill",  truncate=True, height=5, data=df_reemplazado)
g.set_axis_labels("ABANDO", "ATEND")
Out[198]:
<seaborn.axisgrid.FacetGrid at 0x7e47f5eb0790>
<Figure size 2000x600 with 0 Axes>

Modeling

In [130]:
from statsmodels.tsa.arima.model import ARIMA

df_diff = df_g.diff().dropna()

fig, ax = plt.subplots(2, 2, figsize=(15, 10))

plot_acf(df_diff['ATEND'], lags=40, ax=ax[0,0])
plot_pacf(df_diff['ATEND'], lags=40, ax=ax[0,1])
plot_acf(df_diff['ABANDO'], lags=40, ax=ax[1,0])
plot_pacf(df_diff['ABANDO'], lags=40, ax=ax[1,1])
plt.show()
In [131]:
model = ARIMA(df_g['ATEND'],  order=(5, 1, 0))
model_2 = ARIMA(df_g['ABANDO'],  order=(2, 2, 1))

results_2 = model_2.fit()
results = model.fit()

summary_table_atend = results.summary()
summary_table_abnd = results_2.summary()

print("Resumen Atendidas")
print(tabulate(summary_table_atend.tables[1], headers='keys', tablefmt='fancy_grid'))
print("\n")
print("Resumen Abandonadas")
print(tabulate(summary_table_abnd.tables[1], headers='keys', tablefmt='fancy_grid'))
Resumen Atendidas
╒════════╤═══════════╤══════════╤════════╤═══════╤══════════╤══════════╕
│ 0      │ 1         │ 2        │ 3      │ 4     │ 5        │ 6        │
╞════════╪═══════════╪══════════╪════════╪═══════╪══════════╪══════════╡
│        │ coef      │ std err  │ z      │ P>|z| │ [0.025   │ 0.975]   │
├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤
│ ar.L1  │ -0.5337   │ 0.070    │ -7.637 │ 0.000 │ -0.671   │ -0.397   │
├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤
│ ar.L2  │ -0.5449   │ 0.079    │ -6.929 │ 0.000 │ -0.699   │ -0.391   │
├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤
│ ar.L3  │ -0.4749   │ 0.083    │ -5.743 │ 0.000 │ -0.637   │ -0.313   │
├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤
│ ar.L4  │ -0.4269   │ 0.104    │ -4.117 │ 0.000 │ -0.630   │ -0.224   │
├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤
│ ar.L5  │ -0.4442   │ 0.098    │ -4.545 │ 0.000 │ -0.636   │ -0.253   │
├────────┼───────────┼──────────┼────────┼───────┼──────────┼──────────┤
│ sigma2 │ 1.519e+05 │ 1.27e+04 │ 11.980 │ 0.000 │ 1.27e+05 │ 1.77e+05 │
╘════════╧═══════════╧══════════╧════════╧═══════╧══════════╧══════════╛


Resumen Abandonadas
╒════════╤══════════╤══════════╤═════════╤═══════╤═══════════╤══════════╕
│ 0      │ 1        │ 2        │ 3       │ 4     │ 5         │ 6        │
╞════════╪══════════╪══════════╪═════════╪═══════╪═══════════╪══════════╡
│        │ coef     │ std err  │ z       │ P>|z| │ [0.025    │ 0.975]   │
├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤
│ ar.L1  │ -0.6662  │ 0.036    │ -18.724 │ 0.000 │ -0.736    │ -0.596   │
├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤
│ ar.L2  │ -0.4657  │ 0.046    │ -10.056 │ 0.000 │ -0.556    │ -0.375   │
├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤
│ ma.L1  │ -0.9999  │ 6.020    │ -0.166  │ 0.868 │ -12.799   │ 10.799   │
├────────┼──────────┼──────────┼─────────┼───────┼───────────┼──────────┤
│ sigma2 │ 196.5610 │ 1181.872 │ 0.166   │ 0.868 │ -2119.865 │ 2512.987 │
╘════════╧══════════╧══════════╧═════════╧═══════╧═══════════╧══════════╛
In [132]:
fig, ax = plt.subplots(2, 3, figsize=(15, 10))

results.resid.plot(ax=ax[0,0], color='blue', title="Residuos Atendidas")
results_2.resid.plot(ax=ax[0,1], color='red', title="Residuos Abandonadas")

results.resid.plot(kind='kde',ax=ax[1,0], color='blue')
results_2.resid.plot(kind='kde',ax=ax[1,1], color='red')

results.resid.plot(ax=ax[0,2], color='blue', title="Residuos Atendidas vs Abandonadas")
results_2.resid.plot(ax=ax[0,2], color='red')

results.resid.plot(kind='kde',ax=ax[1,2],color='blue')
results_2.resid.plot(kind='kde',ax=ax[1,2],color='red')

plt.show()
In [133]:
resid_summary_atend_df = pd.DataFrame(results.resid.describe(), columns=['Residuos de Atendidas'])
resid_summary_abandon_df = pd.DataFrame(results_2.resid.describe(), columns=['Residuos de Abandonadas'])

print(tabulate(resid_summary_atend_df, headers='keys', tablefmt='fancy_grid'))
print("\n")
print(tabulate(resid_summary_abandon_df, headers='keys', tablefmt='fancy_grid'))
╒═══════╤═════════════════════════╕
│       │   Residuos de Atendidas │
╞═══════╪═════════════════════════╡
│ count │               217       │
├───────┼─────────────────────────┤
│ mean  │                 2.26026 │
├───────┼─────────────────────────┤
│ std   │               398.751   │
├───────┼─────────────────────────┤
│ min   │             -1155.63    │
├───────┼─────────────────────────┤
│ 25%   │              -110.231   │
├───────┼─────────────────────────┤
│ 50%   │                23.7796  │
├───────┼─────────────────────────┤
│ 75%   │               153.142   │
├───────┼─────────────────────────┤
│ max   │              1174.91    │
╘═══════╧═════════════════════════╛


╒═══════╤═══════════════════════════╕
│       │   Residuos de Abandonadas │
╞═══════╪═══════════════════════════╡
│ count │                 217       │
├───────┼───────────────────────────┤
│ mean  │                  -0.50933 │
├───────┼───────────────────────────┤
│ std   │                  14.3276  │
├───────┼───────────────────────────┤
│ min   │                 -40.3605  │
├───────┼───────────────────────────┤
│ 25%   │                  -5.83378 │
├───────┼───────────────────────────┤
│ 50%   │                  -1.91944 │
├───────┼───────────────────────────┤
│ 75%   │                   3.24653 │
├───────┼───────────────────────────┤
│ max   │                  94.0839  │
╘═══════╧═══════════════════════════╛
In [134]:
from statsmodels.tsa.arima.model import ARIMA

n_pasos = 10
modeler = ARIMA(df_g['ATEND'], order=(5, 1, 0))
results = modeler.fit()
forecast = results.forecast(steps=n_pasos)
prediccion = results.predict(start="2023-01-01", end="2023-08-09")

Plot Predictions

In [135]:
plt.figure(figsize=(25, 8))

plt.plot(df_g['ATEND'], label='Entrenamiento Atendidas')
plt.plot(df_g['ANTED_20S'], label='Entrenamiento Atendidas 20s')
sns.lineplot(prediccion, label='Predicción')
plt.plot(forecast, label='Predicción Futuras')

plt.legend()
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.show()
In [136]:
n_pasos = 10
modeler_2 = ARIMA(df_g['ABANDO'], order=(5, 0, 2))
results_2 = modeler_2.fit()
forecast_2 = results_2.forecast(steps=n_pasos)

prediccion_2 = results_2.predict(start="2023-01-01", end="2023-08-09")

plt.figure(figsize=(25, 8))

sns.lineplot(df_g['ABANDO'], label='Predicción de entrenamiento')
sns.lineplot(prediccion_2, label='Predicción de entrenamiento')
plt.plot(forecast_2, label='Predicción Futuras')
plt.legend()
plt.xticks(rotation=90, ha='right', fontsize=8)
plt.show()
In [137]:
fig, ax = plt.subplots(1, 2, figsize=(30, 10))

sns.distplot(prediccion,kde=True, color='blue', label='Atendidades Predicción',ax=ax[0])
sns.distplot(df.groupby(['Fecha'])['ATEND'].sum(),kde=True, color='red', label='Atendidades Reales',ax=ax[0])

sns.distplot(prediccion_2,kde=True, color='blue', label='Abandonadas Predicción',ax=ax[1])
sns.distplot(df.groupby(['Fecha'])['ABANDO'].sum(),kde=True, color='red', label='Abandonadas Reales',ax=ax[1])

ax[0].legend()
ax[1].legend()
plt.show()

Model Evaluation

In [138]:
mae = mean_absolute_error(df_g['ATEND'][-276:], prediccion)
mse = mean_squared_error(df_g['ATEND'][-276:], prediccion)
rmse = sqrt(mse)
mape = (abs((df_g['ATEND'][-n_pasos:] - prediccion) / df_g['ATEND'][-n_pasos:])).mean()


print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape * 100:.2f}%")
MAE: 277.42
MSE: 158275.05
RMSE: 397.84
MAPE: 59.26%
In [139]:
mae = mean_absolute_error(df_g['ABANDO'][-276:], prediccion)
mse = mean_squared_error(df_g['ABANDO'][-276:], prediccion)
rmse = sqrt(mse)
mape = (abs((df_g['ABANDO'][-276:] - prediccion) / df_g['ABANDO'][-276:])).mean()

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape * 100:.2f}%")
MAE: 982.06
MSE: 1044841.41
RMSE: 1022.17
MAPE: inf%

Modeling to Skill

In [193]:
from statsmodels.tsa.arima.model import ARIMA

datos_agrupados = df_reemplazado.groupby(['Fecha', 'skill'])['ATEND'].sum().reset_index()
pivot_df = datos_agrupados.pivot(index='Fecha', columns='skill', values='ATEND').fillna(0)

n_pasos = 1

for causa in pivot_df.columns:
    modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(5, 1, 0))
    resultado = modelo_arima.fit()
    plt.figure(figsize=(30, 6))
    prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
    plt.plot(pivot_df[causa], label=causa + ' - Datos reales')
    plt.plot(prediccion, label=causa + ' - Predicción de entrenamiento')
    plt.xticks(rotation=90, ha='right', fontsize=8)
    plt.legend()
    plt.show()
In [194]:
for causa in pivot_df.columns:
    modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
    resultado = modelo_arima.fit()

    plt.figure(figsize=(30, 6))
    prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
    sns.histplot(pivot_df[causa], label=causa + ' - Datos reales', kde=True)
    sns.histplot(prediccion, label=causa + ' - Predicción de entrenamiento',kde=True)
    plt.xticks(rotation=90, ha='right', fontsize=8)
    plt.legend()
In [195]:
plt.figure(figsize=(30, 6))

for causa in pivot_df.columns:

    modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
    resultado = modelo_arima.fit()
    prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])

    sns.lineplot(pivot_df[causa], label=causa + ' - Datos reales')
    sns.lineplot(prediccion, label=causa + ' - Predicción de entrenamiento')

plt.xticks(rotation=90, ha='right', fontsize=8)

plt.legend()

plt.show()
In [186]:
from statsmodels.tsa.arima.model import ARIMA

datos_agrupados_2 = df_reemplazado.groupby(['Fecha', 'skill'])['ABANDO'].sum().reset_index()
pivot_df = datos_agrupados_2.pivot(index='Fecha', columns='skill', values='ABANDO').fillna(0)

n_pasos = 1

for causa in pivot_df.columns:
    modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
    resultado = modelo_arima.fit()

    plt.figure(figsize=(30, 6))
    prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
    plt.plot(pivot_df[causa], label=causa + ' - Datos reales')
    plt.plot(prediccion, label=causa + ' - Predicción de entrenamiento')
    plt.xticks(rotation=90, ha='right', fontsize=8)
    plt.legend()
In [143]:
for causa in pivot_df.columns:
    modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
    resultado = modelo_arima.fit()

    plt.figure(figsize=(30, 6))
    prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])
    sns.histplot(pivot_df[causa], label=causa + ' - Datos reales', kde=True)
    sns.histplot(prediccion, label=causa + ' - Predicción de entrenamiento',kde=True)
    plt.xticks(rotation=90, ha='right', fontsize=8)
    plt.legend()
In [191]:
plt.figure(figsize=(30, 6))

for causa in pivot_df.columns:

    modelo_arima = ARIMA(pivot_df[causa].iloc[:-n_pasos], order=(2, 1, 0))
    resultado = modelo_arima.fit()
    prediccion = resultado.predict(start=pivot_df.index[0], end=pivot_df.index[-n_pasos-1])

    sns.lineplot(pivot_df[causa], label=causa + ' - Datos reales')
    sns.lineplot(prediccion, label=causa + ' - Predicción de entrenamiento')

plt.xticks(rotation=90, ha='right', fontsize=8)

plt.legend()

plt.show()